1 CONFIGURATIONS

# Set the working directory and tool paths on your local computer.
WD <- '/Users/Alec/motrpac/20210826_pass1c-umich'
# Set the gsutil path

knitr::opts_knit$set(root.dir=WD)
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)
knitr::opts_chunk$set(cache = FALSE)
1.0.0.0.1 CSS Top Styling
write_css = FALSE
if(write_css){
  writeLines("td, th { padding : 3px } th { background-color:white; color:black; border:1px solid black; text-align:center } td {color:black; border:1px solid black; word-wrap:break-word; white-space:nowrap; overflow: hidden; text-overflow: ellipsis; max-width:300px; text-align:left}", con=file.path(normalizePath(WD), "style/style.css"))
}
1.0.0.0.2 Overview
1.0.0.0.3 Goals of Analysis
  • Examine data dimensions
  • Examine zero, negative, and missing Values
  • Impute missing values
  • Visualize sample-by-sample correlations
  • Identify outliers
  • Identify major causes of variance (drift, sex, control group, hour)
  • Visualize sample-by-feature heatmaps under different transformations
  • Examine sample median and sd distributions between each transformation
  • Transform data
  • Remove outlier samples
  • Save the processed data

2 Prepare Environment & Load Data

2.1 Setup the Environment

################################################################################
##### Resources and Dependencies ###############################################
################################################################################
# Whether to knit document and display data
knit_time = TRUE

# Load dependencies
pacs...man <- c("tidyverse","kableExtra","devtools","MotrpacBicQC","impute","glue",
                "rethinking")
for(pac in pacs...man){
  suppressWarnings(suppressPackageStartupMessages(library(pac, character.only = TRUE)))
}

#browseVignettes("MotrpacBicQC")
############################################################
##### Functions ############################################
############################################################

# Name functions
select <- dplyr::select
map <- purrr::map
desc <- dplyr::desc
arrange <- dplyr::arrange
melt <- reshape2::melt
mutate <- dplyr::mutate
glue <- glue::glue

# Global options
options(dplyr.print_max = 100)
options(scipen=10000)

# Colors
redblue100<-rgb(read.table(paste0(WD,'/colors/redblue100.txt'),sep='\t',row.names=1,header=T))

2.2 Log Variables (1)

ds <- 'pass1a'
site <- 'umich'
tech <- 'rppos'
TIS <- list('PLA', 'HIP', 'GAS', 'HRT', 'KID', 'LUN', 'LIV', 'BADI', 'WADI')
tis <- list('pla', 'hip', 'gas', 'hrt', 'kid', 'lun', 'liv', 'badi', 'wadi')

2.3 Load Phenotype data

# Phenotype Data (1A)
#########################
pheno_file <- glue("{WD}/data/20211215_pass1a-06-pheno-viallabel_steep.txt")
pheno_df1a <- read.csv(pheno_file, header = T, sep = '\t')
# pheno_df1a <- pheno_df1a %>%
#   mutate(tissue = case_when(Specimen.Processing.sampletypedescription == 'Brown Adipose' ~ 'badi',
#                             Specimen.Processing.sampletypedescription == 'EDTA Plasma' ~ 'pla',
#                             Specimen.Processing.sampletypedescription == 'Gastrocnemius' ~ 'gas',
#                             Specimen.Processing.sampletypedescription == 'Heart' ~ 'hrt',
#                             Specimen.Processing.sampletypedescription == 'Kidney' ~ 'kid',
#                             Specimen.Processing.sampletypedescription == 'Liver' ~ 'liv',
#                             Specimen.Processing.sampletypedescription == 'Lung' ~ 'lun',
#                             Specimen.Processing.sampletypedescription == 'White Adipose' ~ 'wadi',
#                             Specimen.Processing.sampletypedescription == 'PaxGene RNA' ~ 'pax',
#                             Specimen.Processing.sampletypedescription == 'Hippocampus' ~ 'hip',
#                             Specimen.Processing.sampletypedescription == 'Cortex' ~ 'cor',
#                             Specimen.Processing.sampletypedescription == 'Hypothalamus' ~ 'hyp',
#                             Specimen.Processing.sampletypedescription == 'Vastus Lateralis' ~ 'vas',
#                             Specimen.Processing.sampletypedescription == 'Tibia' ~ 'tib',
#                             Specimen.Processing.sampletypedescription == 'Aorta' ~ 'aor',
#                             Specimen.Processing.sampletypedescription == 'Small Intestine' ~ 'sma',
#                             Specimen.Processing.sampletypedescription == 'Adrenals' ~ 'adr',
#                             Specimen.Processing.sampletypedescription == 'Colon' ~ 'col',
#                             Specimen.Processing.sampletypedescription == 'Spleen' ~ 'spl',
#                             Specimen.Processing.sampletypedescription == 'Testes' ~ 'tes',
#                             Specimen.Processing.sampletypedescription == 'Ovaries' ~ 'ova'))
pheno_df1a$viallabel <- as.character(pheno_df1a$viallabel)

2.4 Load Data

2.4.1 Load the sample order files

# #created in 20210910_pass1a-umich-sample-annotation_steep.Rmd
# order_file <- glue("{WD}/data/20200910_pass1a1c-sample-order_steep.txt")
# sample.order<-read.delim(order_file,header=T, sep="\t")

# Load the prior pass1a data (takes a few minutes)
pass1a_nested_file <- glue("{WD}/../20200915_metabolomics-pass1a/data/20201010_pass1a-metabolomics-countdata-nested_steep.rds")
pass1a_df <- readRDS(pass1a_nested_file)

# pla
pla_rppos_pass1a.0.order <- pass1a_df %>%
  filter(STUDY_INSTITUTE == 'University of Michigan') %>%
  filter(NAMED == 'named') %>%
  filter(DATASET == 'untargeted') %>%
  filter(METAB_FAMILY == 'rppos') %>%
  filter(TISSUE == 'plasma') %>%
  select(SAMPLE_DATA) %>%
  unnest(SAMPLE_DATA) %>%
  left_join(y = pheno_df1a, by = c('sample_id' = 'viallabel')) %>%
  select(sample_id, sample_type, sample_order, Key.anirandgroup, Registration.sex)

# hip
hip_rppos_pass1a.0.order <- pass1a_df %>%
  filter(STUDY_INSTITUTE == 'University of Michigan') %>%
  filter(NAMED == 'named') %>%
  filter(DATASET == 'untargeted') %>%
  filter(METAB_FAMILY == 'rppos') %>%
  filter(TISSUE == 'hippocampus') %>%
  select(SAMPLE_DATA) %>%
  unnest(SAMPLE_DATA) %>%
  left_join(y = pheno_df1a, by = c('sample_id' = 'viallabel')) %>%
  select(sample_id, sample_type, sample_order, Key.anirandgroup, Registration.sex)

# gas
gas_rppos_pass1a.0.order <- pass1a_df %>%
  filter(STUDY_INSTITUTE == 'University of Michigan') %>%
  filter(NAMED == 'named') %>%
  filter(DATASET == 'untargeted') %>%
  filter(METAB_FAMILY == 'rppos') %>%
  filter(TISSUE == 'gastrocnemius') %>%
  select(SAMPLE_DATA) %>%
  unnest(SAMPLE_DATA) %>%
  left_join(y = pheno_df1a, by = c('sample_id' = 'viallabel')) %>%
  select(sample_id, sample_type, sample_order, Key.anirandgroup, Registration.sex)

# hrt
hrt_rppos_pass1a.0.order <- pass1a_df %>%
  filter(STUDY_INSTITUTE == 'University of Michigan') %>%
  filter(NAMED == 'named') %>%
  filter(DATASET == 'untargeted') %>%
  filter(METAB_FAMILY == 'rppos') %>%
  filter(TISSUE == 'heart') %>%
  select(SAMPLE_DATA) %>%
  unnest(SAMPLE_DATA) %>%
  left_join(y = pheno_df1a, by = c('sample_id' = 'viallabel')) %>%
  select(sample_id, sample_type, sample_order, Key.anirandgroup, Registration.sex)

# kid
kid_rppos_pass1a.0.order <- pass1a_df %>%
  filter(STUDY_INSTITUTE == 'University of Michigan') %>%
  filter(NAMED == 'named') %>%
  filter(DATASET == 'untargeted') %>%
  filter(METAB_FAMILY == 'rppos') %>%
  filter(TISSUE == 'kidney') %>%
  select(SAMPLE_DATA) %>%
  unnest(SAMPLE_DATA) %>%
  left_join(y = pheno_df1a, by = c('sample_id' = 'viallabel')) %>%
  select(sample_id, sample_type, sample_order, Key.anirandgroup, Registration.sex)

# lun
lun_rppos_pass1a.0.order <- pass1a_df %>%
  filter(STUDY_INSTITUTE == 'University of Michigan') %>%
  filter(NAMED == 'named') %>%
  filter(DATASET == 'untargeted') %>%
  filter(METAB_FAMILY == 'rppos') %>%
  filter(TISSUE == 'lung') %>%
  select(SAMPLE_DATA) %>%
  unnest(SAMPLE_DATA) %>%
  left_join(y = pheno_df1a, by = c('sample_id' = 'viallabel')) %>%
  select(sample_id, sample_type, sample_order, Key.anirandgroup, Registration.sex)

# liv
liv_rppos_pass1a.0.order <- pass1a_df %>%
  filter(STUDY_INSTITUTE == 'University of Michigan') %>%
  filter(NAMED == 'named') %>%
  filter(DATASET == 'untargeted') %>%
  filter(METAB_FAMILY == 'rppos') %>%
  filter(TISSUE == 'liver') %>%
  select(SAMPLE_DATA) %>%
  unnest(SAMPLE_DATA) %>%
  left_join(y = pheno_df1a, by = c('sample_id' = 'viallabel')) %>%
  select(sample_id, sample_type, sample_order, Key.anirandgroup, Registration.sex)

# badi
badi_rppos_pass1a.0.order <- pass1a_df %>%
  filter(STUDY_INSTITUTE == 'University of Michigan') %>%
  filter(NAMED == 'named') %>%
  filter(DATASET == 'untargeted') %>%
  filter(METAB_FAMILY == 'rppos') %>%
  filter(TISSUE == 'brown-adipose') %>%
  select(SAMPLE_DATA) %>%
  unnest(SAMPLE_DATA) %>%
  left_join(y = pheno_df1a, by = c('sample_id' = 'viallabel')) %>%
  select(sample_id, sample_type, sample_order, Key.anirandgroup, Registration.sex)

# wadi
wadi_rppos_pass1a.0.order <- pass1a_df %>%
  filter(STUDY_INSTITUTE == 'University of Michigan') %>%
  filter(NAMED == 'named') %>%
  filter(DATASET == 'untargeted') %>%
  filter(METAB_FAMILY == 'rppos') %>%
  filter(TISSUE == 'white-adipose') %>%
  select(SAMPLE_DATA) %>%
  unnest(SAMPLE_DATA) %>%
  left_join(y = pheno_df1a, by = c('sample_id' = 'viallabel')) %>%
  select(sample_id, sample_type, sample_order, Key.anirandgroup, Registration.sex)

rm(pass1a_df)

# Create a list of annotation matrixes
rppos_pass1a.0.order <- list(pla_rppos_pass1a.0.order, hip_rppos_pass1a.0.order, gas_rppos_pass1a.0.order, 
                 hrt_rppos_pass1a.0.order, kid_rppos_pass1a.0.order, lun_rppos_pass1a.0.order, 
                 liv_rppos_pass1a.0.order, badi_rppos_pass1a.0.order, wadi_rppos_pass1a.0.order)

# Save the Sample order file
save(pla_rppos_pass1a.0.order, hip_rppos_pass1a.0.order, gas_rppos_pass1a.0.order, 
                 hrt_rppos_pass1a.0.order, kid_rppos_pass1a.0.order, lun_rppos_pass1a.0.order, 
                 liv_rppos_pass1a.0.order, badi_rppos_pass1a.0.order, wadi_rppos_pass1a.0.order,
     file = glue("{WD}/data/UM-rppos/rppos_pass1a.0.order.RData"))

2.4.2 Load the matrices as .RData files

# UM-rppos
load(file = glue("{WD}/data/UM-rppos/UM_rppos.0.RData"))
pla1a.0 <- pla_rppos_pass1a.0
hip1a.0 <- hip_rppos_pass1a.0
gas1a.0 <- gas_rppos_pass1a.0
hrt1a.0 <- hrt_rppos_pass1a.0
kid1a.0 <- kid_rppos_pass1a.0
lun1a.0 <- lun_rppos_pass1a.0
liv1a.0 <- liv_rppos_pass1a.0
badi1a.0 <- badi_rppos_pass1a.0
wadi1a.0 <- wadi_rppos_pass1a.0

# Create a list of matrices
pass1a.0 <- list(pla1a.0, hip1a.0, gas1a.0, hrt1a.0, kid1a.0, lun1a.0, liv1a.0, badi1a.0, wadi1a.0)

2.5 Remove Internal Standards

is <- list()
for(i in 1:length(pass1a.0)){
  # Remove internal standards
  is[[i]] <- colnames(pass1a.0[[i]])[grepl("istd", colnames(pass1a.0[[i]]), ignore.case = TRUE)]
  # Subset matrix
  pass1a.0[[i]] <- pass1a.0[[i]][, !colnames(pass1a.0[[i]]) %in% is]
}

2.5.1 Create Annotations

i <- 1
tmp.iter1a <- tmp.ref1a <- tmp.sample1a <- pass1a.0.nr <- list()
for(i in 1:length(rppos_pass1a.0.order)){
  print(tis[[i]])
  # Collect the distances from reference samples
  tmp.iter1a[[i]] <- rppos_pass1a.0.order[[i]] %>%
    mutate(reference = ifelse(str_sub(sample_id,1,1) == '9', 0, 1)) %>%
    mutate(drift = ifelse(sample_type == 'QC-DriftCorrection', 1, 0)) %>%
    arrange(sample_order)
  tmp.iter1a[[i]]$right_p <- 0
  for(j in 1:nrow(tmp.iter1a[[i]])){
    # set t
    if(tmp.iter1a[[i]][j,'reference'] == 1){
      t = 0
    }else if(tmp.iter1a[[i]][j,'reference'] == 0){
      t = t + 1
    }
    tmp.iter1a[[i]][j,"right_p"] <- t
  }
  t=0
  tmp.iter1a[[i]]$right_p_d <- 0
  for(j in 1:nrow(tmp.iter1a[[i]])){
    # set t
    if(tmp.iter1a[[i]][j,'drift'] == 1){
      t = 0
    }else if(tmp.iter1a[[i]][j,'drift'] == 0){
      t = t + 1
    }
    tmp.iter1a[[i]][j,"right_p_d"] <- t
  }
  t=0

  tmp.iter1a[[i]] <- tmp.iter1a[[i]] %>%
    arrange(desc(sample_order))
  tmp.iter1a[[i]]$left_p <- 0
  for(j in 1:nrow(tmp.iter1a[[i]])){
    # set t
    if(tmp.iter1a[[i]][j,'reference'] == 1){
      t = 0
    }else if(tmp.iter1a[[i]][j,'reference'] == 0){
      t = t + 1
    }
    tmp.iter1a[[i]][j,"left_p"] <- t
  }
  t=0
  tmp.iter1a[[i]]$left_p_d <- 0
  for(j in 1:nrow(tmp.iter1a[[i]])){
    # set t
    if(tmp.iter1a[[i]][j,'drift'] == 1){
      t = 0
    }else if(tmp.iter1a[[i]][j,'drift'] == 0){
      t = t + 1
    }
    tmp.iter1a[[i]][j,"left_p_d"] <- t
  }
  t=0

  tmp.iter1a[[i]] <- tmp.iter1a[[i]] %>%
    rowwise() %>%
    mutate(min_p = min(c(left_p,right_p) ,na.rm = TRUE)) %>%
    mutate(sum_p = sum(c(left_p,right_p) ,na.rm = TRUE)) %>%
    mutate(min_p_d = min(c(left_p_d,right_p_d) ,na.rm = TRUE)) %>%
    mutate(sum_p_d = sum(c(left_p_d,right_p_d) ,na.rm = TRUE)) %>%
    arrange(sample_order)
  tmp.join1a <- tmp.iter1a[[i]] %>%
    select(sample_id, sample_order, left_p, right_p, min_p, sum_p, left_p_d, right_p_d, min_p_d, sum_p_d)

  # Collect the sample order for test+ref samples
  tmp.ref1a[[i]] <- rppos_pass1a.0.order[[i]] %>%
    arrange(sample_order) %>% 
    mutate(control = ifelse(grepl('Control', Key.anirandgroup), 1, 0)) %>%
    mutate(drift = ifelse(grepl('Drift', sample_type), 1, 0)) %>%
    mutate(reference = ifelse(str_sub(sample_id,1,1) == '9', 0, 1)) %>%
    mutate(time = case_when(grepl('IPE', Key.anirandgroup) ~ 0,
                            grepl('0 hr', Key.anirandgroup) ~ 0,
                            grepl('0.5 hr', Key.anirandgroup) ~ 0.5,
                            grepl('1 hr', Key.anirandgroup) ~ 1,
                            grepl('4 hr', Key.anirandgroup) ~ 4,
                            grepl('7 hr', Key.anirandgroup) ~ 7,
                            grepl('24 hr', Key.anirandgroup) ~ 24,
                            grepl('48 hr', Key.anirandgroup) ~ 48)) %>%
    left_join(y = tmp.join1a) %>%
    select(sample_id,sample_order, Registration.sex, control, time, drift, reference,
         left_p, right_p, min_p, sum_p, left_p_d, right_p_d, min_p_d, sum_p_d)
  N2 <- tmp.ref1a[[i]][,1] %>% unlist()
  miss1 <- N2[!N2 %in% row.names(pass1a.0[[i]])] # Verify all samples are in the pass1a.0[[i]] file
  miss1
  # sample.order %>% filter(sample_id == "90750016606")
  print('Vice Versa:')
  miss2 <- row.names(pass1a.0[[i]])[!row.names(pass1a.0[[i]]) %in% N2]
  miss2
  N2 <- N2[!N2 %in% c(miss1,miss2)] # TODO: investigate why samples are missing, continue for now
  # Reorder pass1a.0[[i]] by run order
  pass1a.0[[i]] <- pass1a.0[[i]][N2,]
  all(N2 == row.names(pass1a.0[[i]])) # Must be true
  tmp.ref1a[[i]] <- tmp.ref1a[[i]] %>%
    filter(sample_id %in% N2) %>%
    select(sample_order, Registration.sex, control, time, drift, reference,
           left_p, right_p, min_p, sum_p, left_p_d, right_p_d, min_p_d, sum_p_d) %>% as.matrix()

  # Collect the sample order for test samples
  options(digits = 14)
  tmp.sample1a[[i]] <- rppos_pass1a.0.order[[i]] %>%
    filter(substr(sample_id, 1, 1) == '9') %>%
    arrange(sample_order) %>% 
    mutate(control = ifelse(grepl('Control', Key.anirandgroup), 1, 0)) %>%
    mutate(time = case_when(grepl('IPE', Key.anirandgroup) ~ 0,
                            grepl('0 hr', Key.anirandgroup) ~ 0,
                            grepl('0.5 hr', Key.anirandgroup) ~ 0.5,
                            grepl('1 hr', Key.anirandgroup) ~ 1,
                            grepl('4 hr', Key.anirandgroup) ~ 4,
                            grepl('7 hr', Key.anirandgroup) ~ 7,
                            grepl('24 hr', Key.anirandgroup) ~ 24,
                            grepl('48 hr', Key.anirandgroup) ~ 48)) %>%
    left_join(y = tmp.join1a) %>%
    mutate(sample_id = str_replace_all(sample_id, pattern = '-', replacement = '')) %>%
    mutate(sample_id = as.numeric(sample_id)) %>%
    select(sample_id, sample_order, Registration.sex, control, time, 
           left_p, right_p, min_p, sum_p, left_p_d, right_p_d, min_p_d, sum_p_d) %>%
    as.matrix()

  N1 <- row.names(pass1a.0[[i]])[substr(row.names(pass1a.0[[i]]), 1, 1) == '9']
  tmp.sample1a[[i]] <- tmp.sample1a[[i]][tmp.sample1a[[i]][,1] %in% N1,]
  # Reorder pass1a.0[[i]].nr by run order
  pass1a.0.nr[[i]] <- pass1a.0[[i]][N1,]
  tmp.sample1a[[i]][,1][!tmp.sample1a[[i]][,1] %in% row.names(pass1a.0.nr[[i]])] # Verify all samples are in the pass1a.0[[i]] file
  all(as.character(tmp.sample1a[[i]][,1]) == row.names(pass1a.0.nr[[i]]))
  # If out of order, command below will ensure pass1a.0[[i]].nr in run order
  #pass1a.0[[i]].nr <- pass1a.0[[i]].nr[as.character(tmp.sample1a[[i]][,1]),]
}
## [1] "pla"
## [1] "Vice Versa:"
## [1] "hip"
## [1] "Vice Versa:"
## [1] "gas"
## [1] "Vice Versa:"
## [1] "hrt"
## [1] "Vice Versa:"
## [1] "kid"
## [1] "Vice Versa:"
## [1] "lun"
## [1] "Vice Versa:"
## [1] "liv"
## [1] "Vice Versa:"
## [1] "badi"
## [1] "Vice Versa:"
## [1] "wadi"
## [1] "Vice Versa:"

3 Dimensions, Zero/Neg/Missing Values, & Log2

3.1 Dimensions (with reference samples)

# Lists
NR <- P <- list()
for(i in 1:length(pass1a.0)){
  print(tis[[i]])
  NR[[i]] <- dim(pass1a.0[[i]])[1]
  P[[i]] <- dim(pass1a.0[[i]])[2]
  print(dim(pass1a.0[[i]]))
}
## [1] "pla"
## [1] 101 190
## [1] "hip"
## [1] 117 154
## [1] "gas"
## [1]  99 145
## [1] "hrt"
## [1] 120 144
## [1] "kid"
## [1] 120 166
## [1] "lun"
## [1] 120 193
## [1] "liv"
## [1] 112 158
## [1] "badi"
## [1] 116 254
## [1] "wadi"
## [1] 109 150

3.2 Dimensions (without reference samples)

N <- list()
for(i in 1:length(pass1a.0.nr)){
  print(tis[[i]])
  N[[i]] <- dim(pass1a.0.nr[[i]])[1]
  print(dim(pass1a.0.nr[[i]]))
}
## [1] "pla"
## [1]  77 190
## [1] "hip"
## [1]  78 154
## [1] "gas"
## [1]  78 145
## [1] "hrt"
## [1]  78 144
## [1] "kid"
## [1]  78 166
## [1] "lun"
## [1]  78 193
## [1] "liv"
## [1]  78 158
## [1] "badi"
## [1]  77 254
## [1] "wadi"
## [1]  74 150

3.3 Negative or Zero Values

confirmed: no negative or zero values

for(i in 1:length(pass1a.0.nr)){
  print(tis[[i]])
  print(min(pass1a.0[[i]],na.rm=T))
}
## [1] "pla"
## [1] 59.47415952
## [1] "hip"
## [1] 485.2202112
## [1] "gas"
## [1] 139.0807156
## [1] "hrt"
## [1] 52.41975356
## [1] "kid"
## [1] 272.77805
## [1] "lun"
## [1] 398.9688577
## [1] "liv"
## [1] 65.14356828
## [1] "badi"
## [1] 104.3620277
## [1] "wadi"
## [1] 239.4593839

3.4 Missing Features (with references)

Blank reference samples at the beginning and end

pass1a.0.f.c0 <- list()
for(i in 1:length(pass1a.0)){
  pass1a.0.f.c0[[i]] <-apply(pass1a.0[[i]],1,function(x) sum(is.na(x))) 
}

# Plot
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  plot(pass1a.0.f.c0[[i]], ylim = c(0,max(unlist(P))), main = glue("{tis[[i]]}"), xlab = 'Samples', ylab = 'Missing Values')
}

3.5 Blank Samples (without references)

No blank test samples

pass1a.0.nr.f.c0 <- list()
for(i in 1:length(pass1a.0.nr)){
  pass1a.0.nr.f.c0[[i]] <-apply(pass1a.0.nr[[i]],1,function(x) sum(is.na(x))) 
}

# Plot
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0.nr)){
  plot(pass1a.0.nr.f.c0[[i]], ylim = c(0,max(unlist(P))), main = glue("{tis[[i]]}"), xlab = 'Samples', ylab = 'Missing Values')
}

3.6 Examine Distribution of missing values (with reference samples)

pass1a.0.f.c0 <- list()
for(i in 1:length(pass1a.0.nr)){
  pass1a.0.f.c0[[i]] <- apply(pass1a.0[[i]],2,function(x) sum(is.na(x))) 
}
# Plot
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0.nr)){
  plot(pass1a.0.f.c0[[i]], ylim = c(0,max(unlist(NR))), main = glue("{tis[[i]]}"), xlab = 'Features', ylab = 'Missing Values')
}

3.7 Examine Distribution of missing values (test samples only)

pass1a.0.nr.f.c0 <- list()
for(i in 1:length(pass1a.0.nr)){
  pass1a.0.nr.f.c0[[i]] <- apply(pass1a.0.nr[[i]],2,function(x) sum(is.na(x))) 
}
# Plot
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0.nr)){
  plot(pass1a.0.nr.f.c0[[i]], ylim = c(0,max(unlist(N))), main = glue("{tis[[i]]}"), xlab = 'Features', ylab = 'Missing Values'); abline(h = 20)
}

3.8 Remove high-missing features

Remove high missing features

rm_n <- list()
for(i in 1:length(pass1a.0.nr.f.c0)){
  print(tis[[i]])
  rm_n[[i]] <- sum(pass1a.0.nr.f.c0[[i]]>=20)
  pass1a.0[[i]] <- pass1a.0[[i]][,pass1a.0.nr.f.c0[[i]]<20]
  pass1a.0.nr[[i]] <- pass1a.0.nr[[i]][,pass1a.0.nr.f.c0[[i]]<20]
  print(dim(pass1a.0[[i]]))
  print(dim(pass1a.0.nr[[i]]))
}
## [1] "pla"
## [1] 101 190
## [1]  77 190
## [1] "hip"
## [1] 117 153
## [1]  78 153
## [1] "gas"
## [1]  99 145
## [1]  78 145
## [1] "hrt"
## [1] 120 144
## [1]  78 144
## [1] "kid"
## [1] 120 166
## [1]  78 166
## [1] "lun"
## [1] 120 188
## [1]  78 188
## [1] "liv"
## [1] 112 158
## [1]  78 158
## [1] "badi"
## [1] 116 224
## [1]  77 224
## [1] "wadi"
## [1] 109 148
## [1]  74 148

3.9 Take the log2

pass1a.0.1 <- pass1a.0.nr1 <- list()
for(i in 1:length(pass1a.0)){
  pass1a.0.1[[i]] <- log(pass1a.0[[i]], 2)
  pass1a.0.nr1[[i]] <- log(pass1a.0.nr[[i]], 2)
}

3.10 Total missing values (includes reference samples)

for(i in 1:length(pass1a.0.1)){
  print(glue("{tis[[i]]}"))
  print(sum(is.na(pass1a.0.1[[i]])))
}
## pla
## [1] 8
## hip
## [1] 847
## gas
## [1] 25
## hrt
## [1] 328
## kid
## [1] 628
## lun
## [1] 976
## liv
## [1] 435
## badi
## [1] 1150
## wadi
## [1] 639

3.11 Total missing values (test samples)

feature_impute <- list()
for(i in 1:length(pass1a.0.nr1)){
  print(tis[[i]])
  print(sum(is.na(pass1a.0.nr1[[i]])))
  feature_impute[[i]] <- apply(is.na(pass1a.0.nr1[[i]]),2,sum)[apply(is.na(pass1a.0.nr1[[i]]),2,sum) > 0]
}
## [1] "pla"
## [1] 7
## [1] "hip"
## [1] 36
## [1] "gas"
## [1] 23
## [1] "hrt"
## [1] 23
## [1] "kid"
## [1] 18
## [1] "lun"
## [1] 4
## [1] "liv"
## [1] 60
## [1] "badi"
## [1] 1
## [1] "wadi"
## [1] 48

3.12 Confirm New Distribution of missing features (test samples only)

pass1a.0.nr1.f.c0 <- list()
for(i in 1:length(pass1a.0.nr1)){
  pass1a.0.nr1.f.c0[[i]] <- apply(pass1a.0.nr1[[i]],2,function(x) sum(is.na(x))) 
}

# Plot
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0.nr1)){
  plot(pass1a.0.nr1.f.c0[[i]], ylim = c(0,max(unlist(NR))), main = glue("{tis[[i]]}"), xlab = 'Features', ylab = 'Missing Values'); abline(h = 20)
}

3.13 Log Variables (2)

# NR
# N
# P
neg_vals <- 0
zero_vals <- 0
feature_na_filter <- 20
knn_k <- 10


P1 <- NR1 <- N1 <- na_vals_impute <- list()
for(i in 1:length(pass1a.0)){
  print(tis[[i]])
  P1[[i]] <- dim(pass1a.0.nr[[i]])[2]
  NR1[[i]] <- dim(pass1a.0[[i]])[1]
  N1[[i]] <- dim(pass1a.0.nr[[i]])[1]
  na_vals_impute[[i]] <- sum(is.na(pass1a.0.nr1[[i]]))
  print(feature_impute[[i]])
}
## [1] "pla"
## glutamate-13C5 [iSTD] 
##                     7 
## [1] "hip"
##       Aminobutyric acid                  Valine         N-Acetylleucine 
##                       3                       3                       3 
##       N(2)-Acetyllysine   N-Acetylglutamic acid      N-Acetylmethionine 
##                       3                       3                       3 
##    5-Hydroxy-tryptophan       1-Methyladenosine N2,N2-Dimethylguanosine 
##                       3                      11                       3 
##                DG(34:2) 
##                       1 
## [1] "gas"
## Indoleacetic acid  Palmitoleic acid          Oleamide          MG(18:1) 
##                 1                 1                 1                 1 
##         SM(d38:1) 
##                19 
## [1] "hrt"
##            Niacinamide            Citric acid                Ser-Leu 
##                      1                      1                      1 
## gamma-Glutamyltyrosine               Cortisol  25-Hydroxycholesterol 
##                      1                      8                      3 
##               PC(33:2)              Thyroxine 
##                      4                      4 
## [1] "kid"
##          N-Acetylserine      Car(2:0)-D3 [iSTD]    5-Hydroxy-tryptophan 
##                       3                       1                       1 
##               CAR(11:0) Sphingosine 1-phosphate              L-Urobilin 
##                       2                       2                       2 
##                PC(33:2)               Thyroxine 
##                       5                       2 
## [1] "lun"
## SM(d40:2) SM(d42:2) 
##         2         2 
## [1] "liv"
##     Car(2:0)-D3 [iSTD]             Kynurenine               CAR(5:1) 
##                      1                      9                      2 
## gamma-Glutamyltyrosine              LPC(15:0)  Taurodeoxycholic acid 
##                      2                     13                      3 
##  ATP-13C10_15N5 [iSTD]      Mesobilirubinogen              SM(d40:2) 
##                      3                     12                     15 
## [1] "badi"
## N-Acetylglycine 
##               1 
## [1] "wadi"
##        Spermidine           Guanine Indoleacetic acid            Hexose 
##                 1                 1                 3                11 
##           Ile-Val          Cytidine          CAR(5:1)           Leu-Ile 
##                 4                 1                 4                 4 
##   C16 Sphinganine       Sphingosine          MG(14:0)         CAR(14:1) 
##                 2                 1                 1                 2 
##         CAR(16:2)         CAR(16:1)  Glycocholic acid          PC(33:2) 
##                 1                 1                 4                 7

4 Imputation

4.1 Imputation (test samples)

pass1a.0.nr2 <- list()
for(i in 1:length(pass1a.0)){
  if(na_vals_impute[[i]] > 0){
    print(tis[[i]])
    glue("Features & Values to impute:")
    feature_impute[[i]]
    pass1a.0.nr2[[i]] <-impute.knn(pass1a.0.nr1[[i]],k=10)$data
    #view the features before and after imputation
    par(mfrow=c(2,1),bg="black")
    image(as.matrix(pass1a.0.nr1[[i]][,pass1a.0.nr1.f.c0[[i]]>0]),col=redblue100,axes=F)
    image(as.matrix(pass1a.0.nr2[[i]][, pass1a.0.nr1.f.c0[[i]]>0]),col=redblue100,axes=F)
    par(mfrow=c(1,1) ,bg="white")
    glue("Verified no missing values: {sum(is.na(pass1a.0.nr2[[i]]))}")  #verified 0
  }else{
    print('No missing values to impute')
    pass1a.0.nr2[[i]] <- pass1a.0.nr1[[i]]
  }
}
## [1] "pla"

## [1] "hip"

## [1] "gas"

## [1] "hrt"

## [1] "kid"

## [1] "lun"

## [1] "liv"

## [1] "badi"

## [1] "wadi"

5 NxN heatmaps

5.1 Plotting NxN heatmaps (+ reference samples)

a <- list(0.93,0.90,0.88,
       0.85,0.90,0.93,
       0.89,0.91,0.80)
b <- 1
par(mfrow=c(3,3), mar = c(0,0,3,0))
i <- 1
for(i in 1:length(pass1a.0)){
  print(tis[[i]])
  x <- tmp.ref1a[[i]][,1]
  sidebar  <- ( (b - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  sidebar <- cbind(sidebar,sidebar,sidebar)
  cor.tmp<-cor(t(pass1a.0.1[[i]]),method="spearman",use="pairwise.complete.obs") #includes ref samples
  print(dim(cor.tmp))
  image(cbind(cor.tmp,sidebar),
            col=redblue100,axes=FALSE,zlim=c(a[[i]],1), main=glue("{TIS[[i]]}2-{NR1[[i]]},
                                                                  run-order, z=({a[[i]]},1)"), asp = 1)
}
## [1] "pla"
## [1] 101 101
## [1] "hip"
## [1] 117 117
## [1] "gas"
## [1] 99 99
## [1] "hrt"
## [1] 120 120
## [1] "kid"
## [1] 120 120
## [1] "lun"
## [1] 120 120
## [1] "liv"
## [1] 112 112
## [1] "badi"
## [1] 116 116
## [1] "wadi"
## [1] 109 109

# if(knit_time){
#   rppos_pass1a.0.order[[i]] %>%
#     arrange(sample_order) %>%
#     select(sample_id, sample_type, sample_order) %>%
#     knitr::kable(format = "html") %>%
#     scroll_box(width = "100%", height = "400px")
# }

5.2 Plotting NxN heatmaps (test samples) and Identification of Outlier Samples

cor.tmp <- o.s <- list()
par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  x <- tmp.sample1a[[i]][,2]
  sidebar  <- ( (b - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  sidebar <- cbind(sidebar,sidebar,sidebar)
  cor.tmp[[i]]<-cor(t(pass1a.0.nr1[[i]]),method="spearman",use="pairwise.complete.obs")
  glue("Verified {N[[i]]} test samples: {dim(cor.tmp)[1]}") #verified, =78
  image(
    cbind(cor.tmp[[i]][order(tmp.sample1a[[i]][,2]),],sidebar),
    col=redblue100,axes=FALSE, zlim=c(a[[i]],1), main=glue("{TIS[[i]]}2-{N1[[i]]}, 
                                                           Run-Order, z=({a[[i]]},1)"), asp = 1)
}

# Outlier sample filter
o.f <- list(0.962, 0.96, 0.95,
            0.89, NA, NA,
            0.934, 0.955, 0.90)
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  plot(apply(cor.tmp[[i]][order(tmp.sample1a[[i]][,2]),order(tmp.sample1a[[i]][,2])],1,median), main = glue("{tis[[i]]}"),
       xlab = 'Samples (Run-Order)', ylab = 'Cor Medians'); abline(h=o.f[[i]], col = 'blue')
}

# Determine which samples are outliers
for(i in 1:length(pass1a.0)){
  if(is.na(o.f[[i]])){
    o.s[[i]] <- NA
  }else{
    o.s[[i]] <- colnames(cor.tmp[[i]])[apply(cor.tmp[[i]],1,median)<o.f[[i]]]
  }
  glue("Outlier Samples: {length(o.s[[i]])}")
}
# Examine outliers
glue("Outlier Samples in {TIS[[1]]}:")
## Outlier Samples in PLA:
rppos_pass1a.0.order[[1]] %>% filter(sample_id %in% o.s[[1]]) %>%
  knitr::kable(format = "html") %>% scroll_box(width = "100%", height = "100%")
sample_id sample_type sample_order Key.anirandgroup Registration.sex
90122013104 Sample 58 Exercise - 4 hr 1
glue("Outlier Samples in {TIS[[2]]}:")
## Outlier Samples in HIP:
rppos_pass1a.0.order[[2]] %>% filter(sample_id %in% o.s[[2]]) %>%
  knitr::kable(format = "html") %>% scroll_box(width = "100%", height = "100%")
sample_id sample_type sample_order Key.anirandgroup Registration.sex
90014015206 Sample 32 Exercise - 4 hr 1
90009015206 Sample 71 Exercise - IPE 2
glue("Outlier Samples in {TIS[[3]]}:")
## Outlier Samples in GAS:
rppos_pass1a.0.order[[3]] %>% filter(sample_id %in% o.s[[3]]) %>%
  knitr::kable(format = "html") %>% scroll_box(width = "100%", height = "100%")
sample_id sample_type sample_order Key.anirandgroup Registration.sex
90112015506 Sample 12 Exercise - 24 hr 1
90045015506 Sample 66 Exercise - IPE 1
90009015506 Sample 90 Exercise - IPE 2
glue("Outlier Samples in {TIS[[4]]}:")
## Outlier Samples in HRT:
rppos_pass1a.0.order[[4]] %>% filter(sample_id %in% o.s[[4]]) %>%
  knitr::kable(format = "html") %>% scroll_box(width = "100%", height = "100%")
sample_id sample_type sample_order Key.anirandgroup Registration.sex
90014015807 Sample 27 Exercise - 4 hr 1
90010015807 Sample 108 Exercise - IPE 1
glue("Outlier Samples in {TIS[[5]]}:")
## Outlier Samples in KID:
rppos_pass1a.0.order[[5]] %>% filter(sample_id %in% o.s[[5]]) %>%
  knitr::kable(format = "html") %>% scroll_box(width = "100%", height = "100%")
sample_id sample_type sample_order Key.anirandgroup Registration.sex
glue("Outlier Samples in {TIS[[6]]}:")
## Outlier Samples in LUN:
rppos_pass1a.0.order[[6]] %>% filter(sample_id %in% o.s[[6]]) %>%
  knitr::kable(format = "html") %>% scroll_box(width = "100%", height = "100%")
sample_id sample_type sample_order Key.anirandgroup Registration.sex
glue("Outlier Samples in {TIS[[7]]}:")
## Outlier Samples in LIV:
rppos_pass1a.0.order[[7]] %>% filter(sample_id %in% o.s[[7]]) %>%
  knitr::kable(format = "html") %>% scroll_box(width = "100%", height = "100%")
sample_id sample_type sample_order Key.anirandgroup Registration.sex
90134016805 Sample 100 Exercise - 0.5 hr 1
glue("Outlier Samples in {TIS[[8]]}:")
## Outlier Samples in BADI:
rppos_pass1a.0.order[[8]] %>% filter(sample_id %in% o.s[[8]]) %>%
  knitr::kable(format = "html") %>% scroll_box(width = "100%", height = "100%")
sample_id sample_type sample_order Key.anirandgroup Registration.sex
90010016906 Sample 34 Exercise - IPE 1
90127016906 Sample 46 Control - IPE 2
glue("Outlier Samples in {TIS[[9]]}:")
## Outlier Samples in WADI:
rppos_pass1a.0.order[[9]] %>% filter(sample_id %in% o.s[[9]]) %>%
  knitr::kable(format = "html") %>% scroll_box(width = "100%", height = "100%")
sample_id sample_type sample_order Key.anirandgroup Registration.sex
90011017005 Sample 33 Exercise - 1 hr 2
90013017005 Sample 57 Exercise - 4 hr 2

5.3 Visualize the Sex-Time-Control NxN Heatmap

par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  x <- tmp.sample1a[[i]][,3]
  sex.type  <- ( (b - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  x <- tmp.sample1a[[i]][,4]
  control.type  <- ( (b - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  x <- as.factor(tmp.sample1a[[i]][,5]) %>% as.numeric()
  time.type  <- ( (b - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  sidebar<-cbind(sex.type,sex.type,sex.type, 
               control.type,control.type,control.type, 
               time.type,time.type,time.type)
  image(
    cbind(
    cor.tmp[[i]][order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5]),
          order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5])],
   sidebar[order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5]),]),
    col=redblue100,axes=FALSE, zlim=c(a[[i]],1), main=glue("{TIS[[i]]}2-{N[[i]]}, 
                                                           Sex-Control-Time, z=({a[[i]]},1)"), asp = 1)
}

par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  plot(apply(cor.tmp[[i]][order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5]), 
                         order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5])],1,median), main = glue("{tis[[i]]}"))
}

5.4 NxN Heatmap: Test and Reference Samples by Run Order

cor.tmp1 <- list()
par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  x <- is.na(tmp.ref1a[[i]][,2]) %>% as.integer()
  sample.type  <- ( (b - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  sidebar<-cbind(sample.type,sample.type,sample.type,sample.type,sample.type)
  cor.tmp1[[i]]<-cor(t(pass1a.0.1[[i]]),method="spearman",use="pairwise.complete.obs") #includes ref samples
  image(
    cbind(cor.tmp1[[i]], sidebar),
    col=redblue100,axes=FALSE, zlim=c(a[[i]],1), main=glue("{TIS[[i]]}2, 
                                                         Run-Order (+Ref-type), z=({a[[i]]},1)"), asp = 1)
}

5.5 Log Variables (3)

run_var <- list(0, 0, 0,
                0, 0, 0,
                0, 0, 0)
outlier_sample_n <- list( length(o.s[[1]]),length(o.s[[2]]),length(o.s[[3]]),
                         length(o.s[[4]]),length(o.s[[5]]),length(o.s[[6]]),
                         length(o.s[[7]]),length(o.s[[8]]),length(o.s[[9]]) )
outlier_samples <- o.s
outlier_filter <- o.f

6 N-P Heatmaps, Median and SD Distributions, & Transformations

6.0.1 N-P Heatmaps: Log2, imputed, sample order,

Note: Samples as columns Sidebar: p-values, t-values, and sig (binary) comparing normal test samples and outlier test samples

n.s <- hmo <- list()
par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  n.s[[i]] <- row.names(pass1a.0.nr2[[i]])[!row.names(pass1a.0.nr2[[i]]) %in% o.s[[i]]]
  hmo[[i]] <- heatmap(pass1a.0.nr2[[i]])$colInd
}

for(i in 1:length(pass1a.0)){
  image(pass1a.0.nr2[[i]][order(tmp.sample1a[[i]][,2]),hmo[[i]]]
      ,col=redblue100,axes=F,main=glue("{TIS[[i]]}2, 
                                       f{P1[[i]]}-HC, Run-Order"), asp = 1)
}

par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  plot(apply(pass1a.0.nr2[[i]][order(tmp.sample1a[[i]][,2]),hmo[[i]]],1,median), main = glue("{tis[[i]]}"))
}

6.0.2 N-P Heatmap: Log2, imputed, Sex-Control-Time order (2)

Note: pass1a.0.2 and pass1a.0.nr2 represent knn-imputed and log2 Note: Samples as columns

par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  a[[i]] <- range(pass1a.0.nr2[[i]])[1]
  b[[i]] <- range(pass1a.0.nr2[[i]])[2]
  x <- tmp.sample1a[[i]][,3]
  sex.type  <- ( (b[[i]] - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  x <- tmp.sample1a[[i]][,4]
  control.type <- ( (b[[i]] - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  x <- as.factor(tmp.sample1a[[i]][,5]) %>% as.numeric()
  time.type  <- ( (b[[i]] - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  sidebar<-cbind(sex.type,sex.type,sex.type,sex.type,sex.type, 
               control.type,control.type, control.type, control.type, control.type,
               time.type, time.type, time.type, time.type, time.type)
  image(
    cbind(pass1a.0.nr2[[i]][order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5]), hmo[[i]] ], 
      sidebar[order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5]),]),
      col=redblue100,axes=F,main=glue("{TIS[[i]]}2, 
                                    f{P1[[i]]}-HC, Sex-Control-Time"), asp =1)
}

par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  plot(apply(pass1a.0.nr2[[i]][order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5]),
     hmo[[i]]],1,median), main = glue("{tis[[i]]}"))
}

6.0.3 Transformations – only for the test samples. This can be revised to remove outlier samples

Strategy is not functionally programed (must revert log transformed back to linear for pass1a.0.3a)

pass1a.0.3a<-pass1a.0.3b<-pass1a.03c<-pass1a.03c2<-pass1a.02b<-pass1a.03d<-pass1a.03d2<-pass1a.02b2<-pass1a.03d3 <- list()
for(i in 1:length(pass1a.0)){
  # pass1a.0.r3a<-pass1a.0.r3b<-pass1a.0.r3c<-pass1a.0.r3c2<-pass1a.0.r2b<-pass1a.0.r3d<-pass1a.0.r3d2<-pass1a.0.r2b2<-pass1a.0.r3d3 <-pass1a.0.2
  pass1a.0.3a[[i]]<-pass1a.0.3b[[i]]<-pass1a.03c[[i]]<-pass1a.03c2[[i]]<-pass1a.02b[[i]]<-pass1a.03d[[i]]<-pass1a.03d2[[i]]<-pass1a.02b2[[i]]<-pass1a.03d3[[i]]<-pass1a.0.nr2[[i]]
  #pass1a.03c3<-pass1a.0.3b2<-pass1a.0.nr2
}

6.0.4 Examine the median and mean distributions (2)

par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  tmp.s.median <- apply(pass1a.0.nr2[[i]],1, median)
  tmp.s.mean <- apply(pass1a.0.nr2[[i]],1, mean)
  plot(tmp.s.median,tmp.s.mean, asp = 1, main = glue("{tis[[i]]}")); abline(0,1)
}

6.0.5 Examine the median and sd distribution (1)

tmp.f.median <- tmp.f.sd <- list()
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  tmp.f.median[[i]] <- apply(2^pass1a.0.nr2[[i]],2, median)
  tmp.f.sd[[i]] <- apply(2^pass1a.0.nr2[[i]],2, sd)
  plot(y = tmp.f.sd[[i]], x = tmp.f.median[[i]],log="xy", main = glue("{tis[[i]]}"))
}

6.0.6 Examine the median and sd distribution (1; w/wo outlier samples)

tmp2.f.median <- list()
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  tmp <- 2^pass1a.0.nr2[[i]][n.s[[i]],]
  #dim(tmp)
  tmp2.f.median[[i]] <- apply(tmp,2, median)
  #tmp2.f.sd <- apply(tmp,2, sd)
  plot(tmp.f.median[[i]],tmp2.f.median[[i]],log="xy", main = glue("{tis[[i]]}"))
  #plot(tmp.f.sd,tmp2.f.sd,log="xy")
}

tmp2.f.sd <- list()
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  tmp <- 2^pass1a.0.nr2[[i]][n.s[[i]],]
  tmp2.f.sd[[i]] <- apply(tmp,2, sd)
  plot(tmp.f.sd[[i]],tmp2.f.sd[[i]],log="xy",main = glue("{tis[[i]]}"))
}

6.1 Center by median, scale by standard deviation across features (3a)

Verification of the first feature

par(mfrow=c(3,3), mar = c(2,3,2,0))
j <- 148
for(i in 1:length(pass1a.0)){
  for (j in 1:length(tmp.f.median[[i]])) {
    pass1a.0.3a[[i]][,j]<-(2^pass1a.0.nr2[[i]][,j] - tmp.f.median[[i]][j])/ tmp.f.sd[[i]][j]
  }
  plot(2^pass1a.0.nr2[[i]][,1],pass1a.0.3a[[i]][,1], main = glue("{tis[[i]]}")) #spot-check, verified
}

6.1.1 N-P Heatmap: median-centered, sd-scaled imputed, run order (3a)

hmo2 <- list()
for(i in 1:length(pass1a.0)){
  hmo2[[i]] <- heatmap(pass1a.0.3a[[i]])$colInd
}

# Run Order; Original HC
###########################
par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  image(pass1a.0.3a[[i]][order(tmp.sample1a[[i]][,2]), hmo[[i]]],
    col=redblue100,axes=F,main=glue("{TIS[[i]]}3a, 
                                    f{P1[[i]]}-HC, Run-Order"), asp = 1)
}

# Run Order; New HC
###########################
par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  image(pass1a.0.3a[[i]][order(tmp.sample1a[[i]][,2]), hmo2[[i]]],
    col=redblue100,axes=F,main=glue("{TIS[[i]]}3a, 
                                    f{P1[[i]]}-HC(3a), Run-Order"), asp = 1)
}

par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  plot(apply(pass1a.0.3a[[i]][order(tmp.sample1a[[i]][,2]),
     hmo[[i]]],1,median), main = glue("{tis[[i]]}"), ylim = c(-2,2)); abline(h = 0, col = 'blue')
}

6.1.2 Examine the median and sd distribution (2)

tmp.f.median2 <- tmp.f.sd2 <- list()
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  tmp.f.median2[[i]] <- apply(pass1a.0.nr2[[i]],2, median)
  tmp.f.sd2[[i]] <- apply(pass1a.0.nr2[[i]],2, sd)
  plot(y = tmp.f.sd2[[i]], x = tmp.f.median2[[i]],log="xy", main = glue("{tis[[i]]}"))
}

6.1.3 Examine the median and sd distribution (2; w/wo outlier samples)

tmp2.f.median2 <- list()
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  tmp <-pass1a.0.nr2[[i]][n.s[[i]],]
  #dim(tmp)
  tmp2.f.median2[[i]] <- apply(tmp,2, median)
  plot(tmp.f.median2[[i]],tmp2.f.median2[[i]],log="xy", main = glue("{tis[[i]]}"))
}

tmp2.f.sd2 <- list()
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  tmp <- pass1a.0.nr2[[i]][n.s[[i]],]
  tmp2.f.sd2[[i]] <- apply(tmp,2, sd)
  plot(tmp.f.sd2[[i]],tmp2.f.sd2[[i]],log="xy",main = glue("{tis[[i]]}"))
}

6.2 Log2, Center by median, scale by standard deviation across features (3b)

Verification of the first & last feature

for(i in 1:length(pass1a.0)){
  for (j in 1:length(tmp2.f.median[[i]])) {
    pass1a.0.3b[[i]][,j]<-(pass1a.0.nr2[[i]][,j]- tmp.f.median2[[i]][j])/tmp.f.sd2[[i]][j]
  }
}
par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  plot(pass1a.0.nr2[[i]][,1],pass1a.0.3b[[i]][,1], main = glue("{tis[[i]]}")) #spot-check, verified
}

for(i in 1:length(pass1a.0)){
  plot(pass1a.0.nr2[[i]][,P1[[i]]],pass1a.0.3b[[i]][,P1[[i]]], main = glue("{tis[[i]]}")) #spot-check, verified
}

6.2.1 N-P Heatmap: median-centered, sd-scaled imputed, run order (3b)

hmo3 <- list()
for(i in 1:length(pass1a.0)){
  hmo3[[i]] <- heatmap(pass1a.0.3b[[i]])$colInd
}

# Run Order; Original HC
###########################
par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  image(pass1a.0.3b[[i]][order(tmp.sample1a[[i]][,2]), hmo[[i]]],
    col=redblue100,axes=F,main=glue("{TIS[[i]]}3b, 
                                    f{P1[[i]]}-HC, Run-Order"), asp = 1)
}

# Run Order; New HC
###########################
par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  image(pass1a.0.3b[[i]][order(tmp.sample1a[[i]][,2]), hmo3[[i]]],
    col=redblue100,axes=F,main=glue("{TIS[[i]]}3b, 
                                    f{P1[[i]]}-HC(3b), Run-Order"), asp = 1)
}

par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  plot(apply(pass1a.0.3b[[i]][order(tmp.sample1a[[i]][,2]),
     hmo[[i]]],1,median), main = glue("{tis[[i]]}"), ylim = c(-1.5,1.5)); abline(h = 0, col = 'blue')
}

6.2.2 N-P Heatmap: median-centered, sd-scaled imputed, Sex-Control-Time order (3b)

par(mfrow=c(3,3), mar = c(0,0,3,0))
for(i in 1:length(pass1a.0)){
  a[[i]] <- range(pass1a.0.3b[[i]])[1]
  b[[i]] <- range(pass1a.0.3b[[i]])[2]
  x <- tmp.sample1a[[i]][,3]
  sex.type  <- ( (b[[i]] - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  x <- tmp.sample1a[[i]][,4]
  control.type <- ( (b[[i]] - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  x <- as.factor(tmp.sample1a[[i]][,5]) %>% as.numeric()
  time.type  <- ( (b[[i]] - a[[i]]) * ((x - min(x))/((max(x) - min(x)))) + a[[i]]) 
  sidebar<-cbind(sex.type,sex.type,sex.type,sex.type,sex.type, 
               control.type,control.type, control.type, control.type, control.type,
               time.type, time.type, time.type, time.type, time.type)
  image(
    cbind(pass1a.0.3b[[i]][order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5]), hmo[[i]] ], 
      sidebar[order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5]),]),
      col=redblue100,axes=F,main=glue("{TIS[[i]]}2, 
                                    f{P1[[i]]}-HC, Sex-Control-Time"), asp =1)
}

par(mfrow=c(3,3), mar = c(2,3,2,0))
for(i in 1:length(pass1a.0)){
  plot(apply(pass1a.0.3b[[i]][order(tmp.sample1a[[i]][,3],tmp.sample1a[[i]][,4],tmp.sample1a[[i]][,5]),
     hmo[[i]]],1,median), main = glue("{tis[[i]]}"), ylim = c(-1.5,1.5)); abline(h = 0, col = 'blue')
}

6.2.3 Examine the sample median and sd distributions (3b)

par(mfrow=c(3,3), mar = c(0,2,1,0))
for(i in 1:length(pass1a.0)){
  boxplot(data.frame(t(pass1a.0.3b[[i]])), ylim = c(-4,4), main = glue("{tis[[i]]}"), xaxt = "n")
}

glue("Outlier samples removed:")
## Outlier samples removed:
par(mfrow=c(3,3), mar = c(0,2,1,0))
for(i in 1:length(pass1a.0)){
  boxplot(data.frame(t(pass1a.0.3b[[i]][n.s[[i]],])), ylim = c(-4,4), main = glue("{tis[[i]]}"), xaxt = "n")
}

6.2.4 TODO (in additional script): Collect sample medians independent of run-order outliers to determine if samples should be centered and/or scaled

6.2.5 TODO: Incorporate outlier sample removal

6.2.6 TODO: Incorporate flagged samples

6.2.7 TODO: Incorporate outlier feature removal from transformations

7 Save the processed data

7.1 Concatenate the Processing Decisions

pass1a.0.vars <- list()
comments <- list(NA, NA, NA,
                 NA, NA, NA,
                 NA, NA, NA)
pass1a.0.vars <- data.frame()
for(i in 1:length(pass1a.0)){
  feature_impute[[i]] = names(feature_impute[[i]])
  # The processing decisions
  pass1a.0.vars <- rbind(pass1a.0.vars, data.frame(ds, site, tech, 'tis' = tis[[i]], 'NR' = NR[[i]], 'N' = N[[i]], 'P' = P[[i]], 
                                   neg_vals, zero_vals, feature_na_filter, 'P1' = P1[[i]], 'NR1' = NR1[[i]], 
                                   'N1' = N1[[i]], 'imputed_features' = paste(feature_impute[[i]], collapse = '; '), 
                                   'na_vals_impute' = na_vals_impute[[i]], knn_k, 'run_var' = run_var[[i]], 
                                   'outlier_sample_n' = outlier_sample_n[[i]], 'outlier_samples' =  paste(o.s[[i]], collapse = '; '), 
                                   'outlier_filter' = o.f[[i]],  'internal_standards_n' = length(is[[i]]), 
                                   'internal_standards' = paste(is[[i]], collapse = '; '), 'comments' = comments[[i]]))
}

# visualize the processing decisions
if(knit_time){
  pass1a.0.vars %>%
    knitr::kable(format = "html") %>%
    scroll_box(width = "100%", height = "100%")
}
ds site tech tis NR N P neg_vals zero_vals feature_na_filter P1 NR1 N1 imputed_features na_vals_impute knn_k run_var outlier_sample_n outlier_samples outlier_filter internal_standards_n internal_standards comments
pass1a umich rppos pla 101 77 190 0 0 20 190 101 77 glutamate-13C5 [iSTD] 7 10 0 1 90122013104 0.962 20 valine-13C5 [iSTD]; glutamine-13C5 [iSTD]; glutamate-13C5 [iSTD]; lysine-13C6 [iSTD]; methionine-13C5 [iSTD]; Car-D9 [iSTD]; phenylalanine-13C9 [iSTD]; arginine-13C6 [iSTD]; glucose-13C6 [iSTD]; tyrosine-13C9 [iSTD]; citric acid-13C6 [iSTD]; tryptophan-15N2 [iSTD]; Car(2:0)-D3 [iSTD]; Car(3:0)-D3 [iSTD]; Car(4:0)-D3 [iSTD]; Car(8:0)-D3 [iSTD]; fructose 1,6-bisphosphate-13C6 [iSTD]; Gibberellic acid [iSTD]; Car(16:0)-D3 [iSTD]; 24-epi-Brassinolide [iSTD] NA
pass1a umich rppos hip 117 78 154 0 0 20 153 117 78 Aminobutyric acid; Valine; N-Acetylleucine; N(2)-Acetyllysine; N-Acetylglutamic acid; N-Acetylmethionine; 5-Hydroxy-tryptophan; 1-Methyladenosine; N2,N2-Dimethylguanosine; DG(34:2) 36 10 0 2 90014015206; 90009015206 0.960 0 NA
pass1a umich rppos gas 99 78 145 0 0 20 145 99 78 Indoleacetic acid; Palmitoleic acid; Oleamide; MG(18:1); SM(d38:1) 23 10 0 3 90112015506; 90045015506; 90009015506 0.950 11 glutamate-13C5 [iSTD]; Car-D9 [iSTD]; phenylalanine-13C9 [iSTD]; tyrosine-13C9 [iSTD]; tryptophan-15N2 [iSTD]; Car(3:0)-D3 [iSTD]; Car(4:0)-D3 [iSTD]; Car(8:0)-D3 [iSTD]; Gibberellic acid [iSTD]; Car(16:0)-D3 [iSTD]; 24-epi-Brassinolide [iSTD] NA
pass1a umich rppos hrt 120 78 144 0 0 20 144 120 78 Niacinamide; Citric acid; Ser-Leu; gamma-Glutamyltyrosine; Cortisol; 25-Hydroxycholesterol; PC(33:2); Thyroxine 23 10 0 2 90014015807; 90010015807 0.890 0 NA
pass1a umich rppos kid 120 78 166 0 0 20 166 120 78 N-Acetylserine; Car(2:0)-D3 [iSTD]; 5-Hydroxy-tryptophan; CAR(11:0); Sphingosine 1-phosphate; L-Urobilin; PC(33:2); Thyroxine 18 10 0 1 NA NA 17 aspartate-13C4 [iSTD]; isoleucine-13C6 [iSTD]; leucine-13C6 [iSTD]; methionine-13C5 [iSTD]; Car-D9 [iSTD]; phenylalanine-13C9 [iSTD]; tyrosine-13C9 [iSTD]; tryptophan-15N2 [iSTD]; Car(2:0)-D3 [iSTD]; Car(3:0)-D3 [iSTD]; Car(4:0)-D3 [iSTD]; Car(5:0)-D9 [iSTD]; Car(8:0)-D3 [iSTD]; Gibberellic acid [iSTD]; Car(14:0)-D9 [iSTD]; Car(16:0)-D3 [iSTD]; 24-epi-Brassinolide [iSTD] NA
pass1a umich rppos lun 120 78 193 0 0 20 188 120 78 SM(d40:2); SM(d42:2) 4 10 0 1 NA NA 23 proline-13C5 [iSTD]; valine-13C5 [iSTD]; threonine-13C4 [iSTD]; Asparagine-15N2 [iSTD]; aspartate-13C4 [iSTD]; isoleucine-13C6 [iSTD]; leucine-13C6 [iSTD]; anthranilic acid-15N [iSTD]; glutamate-13C5 [iSTD]; methionine-13C5 [iSTD]; Car-D9 [iSTD]; phenylalanine-13C9 [iSTD]; tyrosine-13C9 [iSTD]; tryptophan-15N2 [iSTD]; Car(2:0)-D3 [iSTD]; Car(3:0)-D3 [iSTD]; Car(4:0)-D3 [iSTD]; Car(5:0)-D9 [iSTD]; Car(8:0)-D3 [iSTD]; Gibberellic acid [iSTD]; Car(14:0)-D9 [iSTD]; Car(16:0)-D3 [iSTD]; 24-epi-Brassinolide [iSTD] NA
pass1a umich rppos liv 112 78 158 0 0 20 158 112 78 Car(2:0)-D3 [iSTD]; Kynurenine; CAR(5:1); gamma-Glutamyltyrosine; LPC(15:0); Taurodeoxycholic acid; ATP-13C10_15N5 [iSTD]; Mesobilirubinogen; SM(d40:2) 60 10 0 1 90134016805 0.934 22 alanine-13C3 [iSTD]; serine-13C3 [iSTD]; proline-13C5 [iSTD]; threonine-13C4 [iSTD]; aspartate-13C4 [iSTD]; oxoglutarate-13C5 [iSTD]; glutamine-13C5 [iSTD]; histidine-13C6 [iSTD]; Car-D9 [iSTD]; phenylalanine-13C9 [iSTD]; tyrosine-13C9 [iSTD]; citric acid-13C6 [iSTD]; tryptophan-15N2 [iSTD]; Car(2:0)-D3 [iSTD]; Car(3:0)-D3 [iSTD]; Car(4:0)-D3 [iSTD]; Car(8:0)-D3 [iSTD]; Gibberellic acid [iSTD]; AMP-13C10_15N5 [iSTD]; Car(16:0)-D3 [iSTD]; 24-epi-Brassinolide [iSTD]; ATP-13C10_15N5 [iSTD] NA
pass1a umich rppos badi 116 77 254 0 0 20 224 116 77 N-Acetylglycine 1 10 0 2 90010016906; 90127016906 0.955 24 proline-13C5 [iSTD]; valine-13C5 [iSTD]; threonine-13C4 [iSTD]; Asparagine-15N2 [iSTD]; aspartate-13C4 [iSTD]; isoleucine-13C6 [iSTD]; Leucine-13C/Isoleucine-13C6 [iSTD]; leucine-13C6 [iSTD]; anthranilic acid-15N [iSTD]; glutamate-13C5 [iSTD]; methionine-13C5 [iSTD]; Car-D9 [iSTD]; phenylalanine-13C9 [iSTD]; tyrosine-13C9 [iSTD]; tryptophan-15N2 [iSTD]; Car(2:0)-D3 [iSTD]; Car(3:0)-D3 [iSTD]; Car(4:0)-D3 [iSTD]; Car(5:0)-D9 [iSTD]; Car(8:0)-D3 [iSTD]; Gibberellic acid [iSTD]; Car(14:0)-D9 [iSTD]; Car(16:0)-D3 [iSTD]; 24-epi-Brassinolide [iSTD] NA
pass1a umich rppos wadi 109 74 150 0 0 20 148 109 74 Spermidine; Guanine; Indoleacetic acid; Hexose; Ile-Val; Cytidine; CAR(5:1); Leu-Ile; C16 Sphinganine; Sphingosine; MG(14:0); CAR(14:1); CAR(16:2); CAR(16:1); Glycocholic acid; PC(33:2) 48 10 0 2 90011017005; 90013017005 0.900 18 proline-13C5 [iSTD]; aspartate-13C4 [iSTD]; oxoglutarate-13C5 [iSTD]; glutamine-13C5 [iSTD]; glutamate-13C5 [iSTD]; Car-D9 [iSTD]; phenylalanine-13C9 [iSTD]; arginine-13C6 [iSTD]; tyrosine-13C9 [iSTD]; tryptophan-15N2 [iSTD]; Car(2:0)-D3 [iSTD]; Car(3:0)-D3 [iSTD]; Car(4:0)-D3 [iSTD]; Car(8:0)-D3 [iSTD]; Gibberellic acid [iSTD]; AMP-13C10_15N5 [iSTD]; Car(16:0)-D3 [iSTD]; 24-epi-Brassinolide [iSTD] NA

7.2 Save the Data

# pla
pla1a.0.nr1 <- pass1a.0.nr1[[1]]
pla1a.0.1 <- pass1a.0.1[[1]]
pla1a.0.nr2 <- pass1a.0.nr2[[1]]
pla1a.0.3a <- pass1a.0.3a[[1]]
pla1a.0.3b <- pass1a.0.3b[[1]]
pla1a.0.vars <- pass1a.0.vars[1,]
# save the data
save(pla1a.0.nr1, pla1a.0.1, pla1a.0.nr2, pla1a.0.3a, pla1a.0.3b, pla1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_pla1a.0.RData"))

# hip
hip1a.0.nr1 <- pass1a.0.nr1[[2]]
hip1a.0.1 <- pass1a.0.1[[2]]
hip1a.0.nr2 <- pass1a.0.nr2[[2]]
hip1a.0.3a <- pass1a.0.3a[[2]]
hip1a.0.3b <- pass1a.0.3b[[2]]
hip1a.0.vars <- pass1a.0.vars[2,]
# save the data
save(hip1a.0.nr1, hip1a.0.1, hip1a.0.nr2, hip1a.0.3a, hip1a.0.3b, hip1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_hip1a.0.RData"))

# gas
gas1a.0.nr1 <- pass1a.0.nr1[[3]]
gas1a.0.1 <- pass1a.0.1[[3]]
gas1a.0.nr2 <- pass1a.0.nr2[[3]]
gas1a.0.3a <- pass1a.0.3a[[3]]
gas1a.0.3b <- pass1a.0.3b[[3]]
gas1a.0.vars <- pass1a.0.vars[3,]
# save the data
save(gas1a.0.nr1, gas1a.0.1, gas1a.0.nr2, gas1a.0.3a, gas1a.0.3b, gas1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_gas1a.0.RData"))

# hrt
hrt1a.0.nr1 <- pass1a.0.nr1[[4]]
hrt1a.0.1 <- pass1a.0.1[[4]]
hrt1a.0.nr2 <- pass1a.0.nr2[[4]]
hrt1a.0.3a <- pass1a.0.3a[[4]]
hrt1a.0.3b <- pass1a.0.3b[[4]]
hrt1a.0.vars <- pass1a.0.vars[4,]
# save the data
save(hrt1a.0.nr1, hrt1a.0.1, hrt1a.0.nr2, hrt1a.0.3a, hrt1a.0.3b, hrt1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_hrt1a.0.RData"))

# kid
kid1a.0.nr1 <- pass1a.0.nr1[[5]]
kid1a.0.1 <- pass1a.0.1[[5]]
kid1a.0.nr2 <- pass1a.0.nr2[[5]]
kid1a.0.3a <- pass1a.0.3a[[5]]
kid1a.0.3b <- pass1a.0.3b[[5]]
kid1a.0.vars <- pass1a.0.vars[5,]
# save the data
save(kid1a.0.nr1, kid1a.0.1, kid1a.0.nr2, kid1a.0.3a, kid1a.0.3b, kid1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_kid1a.0.RData"))

# lun
lun1a.0.nr1 <- pass1a.0.nr1[[6]]
lun1a.0.1 <- pass1a.0.1[[6]]
lun1a.0.nr2 <- pass1a.0.nr2[[6]]
lun1a.0.3a <- pass1a.0.3a[[6]]
lun1a.0.3b <- pass1a.0.3b[[6]]
lun1a.0.vars <- pass1a.0.vars[6,]
# save the data
save(lun1a.0.nr1, lun1a.0.1, lun1a.0.nr2, lun1a.0.3a, lun1a.0.3b, lun1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_lun1a.0.RData"))

# liv
liv1a.0.nr1 <- pass1a.0.nr1[[7]]
liv1a.0.1 <- pass1a.0.1[[7]]
liv1a.0.nr2 <- pass1a.0.nr2[[7]]
liv1a.0.3a <- pass1a.0.3a[[7]]
liv1a.0.3b <- pass1a.0.3b[[7]]
liv1a.0.vars <- pass1a.0.vars[7,]
# save the data
save(liv1a.0.nr1, liv1a.0.1, liv1a.0.nr2, liv1a.0.3a, liv1a.0.3b, liv1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_liv1a.0.RData"))

# badi
badi1a.0.nr1 <- pass1a.0.nr1[[8]]
badi1a.0.1 <- pass1a.0.1[[8]]
badi1a.0.nr2 <- pass1a.0.nr2[[8]]
badi1a.0.3a <- pass1a.0.3a[[8]]
badi1a.0.3b <- pass1a.0.3b[[8]]
badi1a.0.vars <- pass1a.0.vars[8,]
# save the data
save(badi1a.0.nr1, badi1a.0.1, badi1a.0.nr2, badi1a.0.3a, badi1a.0.3b, badi1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_badi1a.0.RData"))

# wadi
wadi1a.0.nr1 <- pass1a.0.nr1[[9]]
wadi1a.0.1 <- pass1a.0.1[[9]]
wadi1a.0.nr2 <- pass1a.0.nr2[[9]]
wadi1a.0.3a <- pass1a.0.3a[[9]]
wadi1a.0.3b <- pass1a.0.3b[[9]]
wadi1a.0.vars <- pass1a.0.vars[9,]
# save the data
save(wadi1a.0.nr1, wadi1a.0.1, wadi1a.0.nr2, wadi1a.0.3a, wadi1a.0.3b, wadi1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_wadi1a.0.RData"))

save(pla1a.0.nr1, pla1a.0.1, pla1a.0.nr2, pla1a.0.3a, pla1a.0.3b, pla1a.0.vars,
     hip1a.0.nr1, hip1a.0.1, hip1a.0.nr2, hip1a.0.3a, hip1a.0.3b, hip1a.0.vars,
     gas1a.0.nr1, gas1a.0.1, gas1a.0.nr2, gas1a.0.3a, gas1a.0.3b, gas1a.0.vars,
     hrt1a.0.nr1, hrt1a.0.1, hrt1a.0.nr2, hrt1a.0.3a, hrt1a.0.3b, hrt1a.0.vars,
     kid1a.0.nr1, kid1a.0.1, kid1a.0.nr2, kid1a.0.3a, kid1a.0.3b, kid1a.0.vars,
     lun1a.0.nr1, lun1a.0.1, lun1a.0.nr2, lun1a.0.3a, lun1a.0.3b, lun1a.0.vars,
     liv1a.0.nr1, liv1a.0.1, liv1a.0.nr2, liv1a.0.3a, liv1a.0.3b, liv1a.0.vars,
     badi1a.0.nr1, badi1a.0.1, badi1a.0.nr2, badi1a.0.3a, badi1a.0.3b, badi1a.0.vars,
     wadi1a.0.nr1, wadi1a.0.1, wadi1a.0.nr2, wadi1a.0.3a, wadi1a.0.3b, wadi1a.0.vars,
  file = glue("{WD}/data/UM-rppos/UM_rppos_processed_pass1a.0.RData"))

8 Session Info

warnings()
session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 4.0.2 (2020-06-22)
##  os       macOS  10.16                
##  system   x86_64, darwin17.0          
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Toronto             
##  date     2021-12-15                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package      * version  date       lib source                                
##  assertthat     0.2.1    2019-03-21 [2] CRAN (R 4.0.2)                        
##  backports      1.2.1    2020-12-09 [2] CRAN (R 4.0.2)                        
##  broom          0.7.8    2021-06-24 [1] CRAN (R 4.0.2)                        
##  cachem         1.0.3    2021-02-04 [2] CRAN (R 4.0.2)                        
##  callr          3.7.0    2021-04-20 [1] CRAN (R 4.0.2)                        
##  cellranger     1.1.0    2016-07-27 [2] CRAN (R 4.0.2)                        
##  cli            3.0.1    2021-07-17 [1] CRAN (R 4.0.2)                        
##  coda           0.19-4   2020-09-30 [1] CRAN (R 4.0.2)                        
##  codetools      0.2-18   2020-11-04 [2] CRAN (R 4.0.2)                        
##  colorspace     2.0-2    2021-06-24 [1] CRAN (R 4.0.2)                        
##  crayon         1.4.1    2021-02-08 [2] CRAN (R 4.0.2)                        
##  curl           4.3.2    2021-06-23 [1] CRAN (R 4.0.2)                        
##  data.table     1.14.0   2021-02-21 [1] CRAN (R 4.0.2)                        
##  DBI            1.1.1    2021-01-15 [2] CRAN (R 4.0.2)                        
##  dbplyr         2.1.1    2021-04-06 [1] CRAN (R 4.0.2)                        
##  desc           1.3.0    2021-03-05 [1] CRAN (R 4.0.2)                        
##  devtools     * 2.4.2    2021-06-07 [1] CRAN (R 4.0.2)                        
##  digest         0.6.27   2020-10-24 [2] CRAN (R 4.0.2)                        
##  dplyr        * 1.0.7    2021-06-18 [1] CRAN (R 4.0.2)                        
##  ellipsis       0.3.2    2021-04-29 [1] CRAN (R 4.0.2)                        
##  evaluate       0.14     2019-05-28 [2] CRAN (R 4.0.1)                        
##  fansi          0.5.0    2021-05-25 [1] CRAN (R 4.0.2)                        
##  fastmap        1.1.0    2021-01-25 [2] CRAN (R 4.0.2)                        
##  forcats      * 0.5.1    2021-01-27 [2] CRAN (R 4.0.2)                        
##  fs             1.5.0    2020-07-31 [2] CRAN (R 4.0.2)                        
##  generics       0.1.0    2020-10-31 [2] CRAN (R 4.0.2)                        
##  ggfittext      0.9.1    2021-01-30 [2] CRAN (R 4.0.2)                        
##  ggplot2      * 3.3.5    2021-06-25 [1] CRAN (R 4.0.2)                        
##  glue         * 1.4.2    2020-08-27 [2] CRAN (R 4.0.2)                        
##  gridExtra      2.3      2017-09-09 [2] CRAN (R 4.0.2)                        
##  gtable         0.3.0    2019-03-25 [2] CRAN (R 4.0.2)                        
##  haven          2.3.1    2020-06-01 [2] CRAN (R 4.0.2)                        
##  highr          0.9      2021-04-16 [1] CRAN (R 4.0.2)                        
##  hms            1.1.0    2021-05-17 [1] CRAN (R 4.0.2)                        
##  htmltools      0.5.1.1  2021-01-22 [2] CRAN (R 4.0.2)                        
##  httr           1.4.2    2020-07-20 [2] CRAN (R 4.0.2)                        
##  impute       * 1.62.0   2020-04-27 [1] Bioconductor                          
##  inline         0.3.19   2021-05-31 [1] CRAN (R 4.0.2)                        
##  inspectdf      0.0.11   2021-04-02 [1] CRAN (R 4.0.2)                        
##  jsonlite       1.7.2    2020-12-09 [2] CRAN (R 4.0.2)                        
##  kableExtra   * 1.3.1    2020-10-22 [2] CRAN (R 4.0.2)                        
##  knitr          1.33     2021-04-24 [1] CRAN (R 4.0.2)                        
##  lattice        0.20-41  2020-04-02 [2] CRAN (R 4.0.2)                        
##  lifecycle      1.0.0    2021-02-15 [1] CRAN (R 4.0.2)                        
##  loo            2.4.1    2020-12-09 [1] CRAN (R 4.0.2)                        
##  lubridate      1.7.10   2021-02-26 [1] CRAN (R 4.0.2)                        
##  magrittr       2.0.1    2020-11-17 [2] CRAN (R 4.0.2)                        
##  MASS           7.3-53   2020-09-09 [2] CRAN (R 4.0.2)                        
##  matrixStats    0.60.0   2021-07-26 [1] CRAN (R 4.0.2)                        
##  memoise        2.0.0    2021-01-26 [2] CRAN (R 4.0.2)                        
##  modelr         0.1.8    2020-05-19 [2] CRAN (R 4.0.2)                        
##  MotrpacBicQC * 0.5.2    2021-06-25 [1] Github (MoTrPAC/MotrpacBicQC@43fb293) 
##  munsell        0.5.0    2018-06-12 [2] CRAN (R 4.0.2)                        
##  mvtnorm        1.1-2    2021-06-07 [1] CRAN (R 4.0.2)                        
##  naniar         0.6.1    2021-05-14 [1] CRAN (R 4.0.2)                        
##  pillar         1.6.2    2021-07-29 [1] CRAN (R 4.0.2)                        
##  pkgbuild       1.2.0    2020-12-15 [2] CRAN (R 4.0.2)                        
##  pkgconfig      2.0.3    2019-09-22 [2] CRAN (R 4.0.2)                        
##  pkgload        1.2.1    2021-04-06 [1] CRAN (R 4.0.2)                        
##  plyr           1.8.6    2020-03-03 [2] CRAN (R 4.0.2)                        
##  prettyunits    1.1.1    2020-01-24 [2] CRAN (R 4.0.2)                        
##  processx       3.5.2    2021-04-30 [1] CRAN (R 4.0.2)                        
##  progress       1.2.2    2019-05-16 [2] CRAN (R 4.0.2)                        
##  ps             1.6.0    2021-02-28 [1] CRAN (R 4.0.2)                        
##  purrr        * 0.3.4    2020-04-17 [2] CRAN (R 4.0.2)                        
##  R6             2.5.0    2020-10-28 [2] CRAN (R 4.0.2)                        
##  Rcpp           1.0.7    2021-07-07 [1] CRAN (R 4.0.2)                        
##  RcppParallel   5.1.4    2021-05-04 [1] CRAN (R 4.0.2)                        
##  readr        * 1.4.0    2020-10-05 [2] CRAN (R 4.0.2)                        
##  readxl         1.3.1    2019-03-13 [2] CRAN (R 4.0.2)                        
##  remotes        2.4.0    2021-06-02 [1] CRAN (R 4.0.2)                        
##  reprex         2.0.0    2021-04-02 [1] CRAN (R 4.0.2)                        
##  reshape2       1.4.4    2020-04-09 [2] CRAN (R 4.0.2)                        
##  rethinking   * 2.13     2021-08-13 [1] Github (rmcelreath/rethinking@2acf2fd)
##  rlang          0.4.11   2021-04-30 [1] CRAN (R 4.0.2)                        
##  rmarkdown      2.6      2020-12-14 [2] CRAN (R 4.0.2)                        
##  rprojroot      2.0.2    2020-11-15 [2] CRAN (R 4.0.2)                        
##  rstan        * 2.21.2   2020-07-27 [1] CRAN (R 4.0.2)                        
##  rstudioapi     0.13     2020-11-12 [2] CRAN (R 4.0.2)                        
##  rvest          1.0.0    2021-03-09 [1] CRAN (R 4.0.2)                        
##  scales         1.1.1    2020-05-11 [2] CRAN (R 4.0.2)                        
##  sessioninfo    1.1.1    2018-11-05 [2] CRAN (R 4.0.2)                        
##  shape          1.4.6    2021-05-19 [1] CRAN (R 4.0.2)                        
##  StanHeaders  * 2.21.0-7 2020-12-17 [1] CRAN (R 4.0.2)                        
##  stringi        1.7.3    2021-07-16 [1] CRAN (R 4.0.2)                        
##  stringr      * 1.4.0    2019-02-10 [2] CRAN (R 4.0.2)                        
##  testthat       3.0.4    2021-07-01 [1] CRAN (R 4.0.2)                        
##  tibble       * 3.1.3    2021-07-23 [1] CRAN (R 4.0.2)                        
##  tidyr        * 1.1.3    2021-03-03 [1] CRAN (R 4.0.2)                        
##  tidyselect     1.1.1    2021-04-30 [1] CRAN (R 4.0.2)                        
##  tidyverse    * 1.3.1    2021-04-15 [1] CRAN (R 4.0.2)                        
##  usethis      * 2.0.1    2021-02-10 [1] CRAN (R 4.0.2)                        
##  utf8           1.2.2    2021-07-24 [1] CRAN (R 4.0.2)                        
##  V8             3.4.2    2021-05-01 [1] CRAN (R 4.0.2)                        
##  vctrs          0.3.8    2021-04-29 [1] CRAN (R 4.0.2)                        
##  viridisLite    0.4.0    2021-04-13 [1] CRAN (R 4.0.2)                        
##  visdat         0.5.3    2019-02-15 [2] CRAN (R 4.0.2)                        
##  webshot        0.5.2    2019-11-22 [2] CRAN (R 4.0.2)                        
##  withr          2.4.2    2021-04-18 [1] CRAN (R 4.0.2)                        
##  xfun           0.24     2021-06-15 [1] CRAN (R 4.0.2)                        
##  xml2           1.3.2    2020-04-23 [2] CRAN (R 4.0.2)                        
##  yaml           2.2.1    2020-02-01 [2] CRAN (R 4.0.2)                        
## 
## [1] /Users/Alec/Library/R/4.0/library
## [2] /Library/Frameworks/R.framework/Versions/4.0/Resources/library